import numpy as np # used for handling numbers
import pandas as pd # used for handling the dataset
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
%matplotlib inline
import seaborn as sns #data visualization library
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('Military Expenditure.csv')
df.head()
| Name | Code | Type | Indicator Name | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | ... | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | Afghanistan | AFG | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 2.518695e+08 | 2.981469e+08 | 3.258070e+08 | 2.385834e+08 | 2.171941e+08 | 2.682271e+08 | 1.995186e+08 | 1.858783e+08 | 1.914071e+08 | 1.980863e+08 |
| 2 | Angola | AGO | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 3.311193e+09 | 3.500795e+09 | 3.639496e+09 | 4.144635e+09 | 6.090752e+09 | 6.841864e+09 | 3.608299e+09 | 2.764055e+09 | 3.062873e+09 | 1.983614e+09 |
| 3 | Albania | ALB | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 1.827369e+08 | 1.858932e+08 | 1.970068e+08 | 1.832047e+08 | 1.800155e+08 | 1.781204e+08 | 1.323507e+08 | 1.308532e+08 | 1.443827e+08 | 1.804887e+08 |
| 4 | Andorra | AND | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 63 columns
df.shape
(264, 63)
df['Type'].value_counts(normalize = True) * 100
Country 76.515152 Regions Clubbed Economically 12.500000 Semi Autonomous Region 6.818182 Regions Clubbed Geographically 4.166667 Name: Type, dtype: float64
df_null_check = pd.DataFrame(df.isnull().sum().iloc[5:]).transpose()
series_null_percent = df.isnull().sum().apply(lambda x : (x/df.shape[0])*100)
df_null_percent = pd.DataFrame(series_null_percent)
df_null_percent.rename(columns = {"":"columns",0:"null_percent"},inplace = True)
df_null_percent
| null_percent | |
|---|---|
| Name | 0.000000 |
| Code | 0.000000 |
| Type | 0.000000 |
| Indicator Name | 0.000000 |
| 1960 | 70.075758 |
| ... | ... |
| 2014 | 23.106061 |
| 2015 | 25.000000 |
| 2016 | 25.378788 |
| 2017 | 26.136364 |
| 2018 | 25.757576 |
63 rows × 1 columns
fig,ax = plt.subplots(figsize=(30,1))
ax.tick_params(labelsize=20)
sns.heatmap(df_null_percent.transpose().iloc[:,4:],cmap = 'coolwarm',cbar=False)
plt.xticks(rotation=45,)
plt.yticks(rotation=0)
plt.show()
df1 = df.assign(Total = df.sum(axis=1))
df1.head(5)
| Name | Code | Type | Indicator Name | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | ... | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | Total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000e+00 |
| 1 | Afghanistan | AFG | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 2.981469e+08 | 3.258070e+08 | 2.385834e+08 | 2.171941e+08 | 2.682271e+08 | 1.995186e+08 | 1.858783e+08 | 1.914071e+08 | 1.980863e+08 | 3.239602e+09 |
| 2 | Angola | AGO | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 3.500795e+09 | 3.639496e+09 | 4.144635e+09 | 6.090752e+09 | 6.841864e+09 | 3.608299e+09 | 2.764055e+09 | 3.062873e+09 | 1.983614e+09 | 7.011220e+10 |
| 3 | Albania | ALB | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | 1.858932e+08 | 1.970068e+08 | 1.832047e+08 | 1.800155e+08 | 1.781204e+08 | 1.323507e+08 | 1.308532e+08 | 1.443827e+08 | 1.804887e+08 | 3.054373e+09 |
| 4 | Andorra | AND | Country | Military expenditure (current USD) | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000e+00 |
5 rows × 64 columns
This was done to make sure that plotting had no errors
Why didn't we impute?
Imputing factual Data of militiary which it self is a sensitive topic could have lead to mis-leading results. We don't want data that will hinder our actions just because the analysis was wrong. "I rather be uninformed than to be misinformed."
df1.fillna(0, inplace=True)
df1.head(5)
| Name | Code | Type | Indicator Name | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | ... | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | Total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
| 1 | Afghanistan | AFG | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 2.981469e+08 | 3.258070e+08 | 2.385834e+08 | 2.171941e+08 | 2.682271e+08 | 1.995186e+08 | 1.858783e+08 | 1.914071e+08 | 1.980863e+08 | 3.239602e+09 |
| 2 | Angola | AGO | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 3.500795e+09 | 3.639496e+09 | 4.144635e+09 | 6.090752e+09 | 6.841864e+09 | 3.608299e+09 | 2.764055e+09 | 3.062873e+09 | 1.983614e+09 | 7.011220e+10 |
| 3 | Albania | ALB | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1.858932e+08 | 1.970068e+08 | 1.832047e+08 | 1.800155e+08 | 1.781204e+08 | 1.323507e+08 | 1.308532e+08 | 1.443827e+08 | 1.804887e+08 | 3.054373e+09 |
| 4 | Andorra | AND | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 | 0.000000e+00 |
5 rows × 64 columns
columns=[str(i) for i in list((range(1960,2019)))]
columns=columns+["Total"]
for i in columns:
df1[i]=df1[i]/1.e+9
df1=np.round(df1, decimals=2)
df1.head()
| Name | Code | Type | Indicator Name | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | ... | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | Total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| 1 | Afghanistan | AFG | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.30 | 0.33 | 0.24 | 0.22 | 0.27 | 0.20 | 0.19 | 0.19 | 0.20 | 3.24 |
| 2 | Angola | AGO | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 3.50 | 3.64 | 4.14 | 6.09 | 6.84 | 3.61 | 2.76 | 3.06 | 1.98 | 70.11 |
| 3 | Albania | ALB | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.19 | 0.20 | 0.18 | 0.18 | 0.18 | 0.13 | 0.13 | 0.14 | 0.18 | 3.05 |
| 4 | Andorra | AND | Country | Military expenditure (current USD) | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
5 rows × 64 columns
df1.sort_values(by=['Type','Total'],ascending=[False,False],inplace=True)
df1=df1[df1['Type'].str.contains("Country")]
df2 = df1[:20]
df3 = df2.drop(['Indicator Name', 'Code', 'Type'], axis=1)
new = df3.reset_index(drop=True)
new.head(20)
| Name | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | 1968 | ... | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | Total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | United States | 45.38 | 47.81 | 52.38 | 52.30 | 51.21 | 51.83 | 63.57 | 75.45 | 80.73 | ... | 698.00 | 711.00 | 685.00 | 640.00 | 610.00 | 596.00 | 600.00 | 606.00 | 649.00 | 17284.12 |
| 1 | China | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 116.00 | 138.00 | 157.00 | 180.00 | 201.00 | 214.00 | 216.00 | 228.00 | 250.00 | 2368.37 |
| 2 | France | 3.88 | 4.13 | 4.49 | 4.63 | 4.92 | 5.12 | 5.41 | 5.86 | 6.13 | ... | 61.78 | 64.60 | 60.04 | 62.42 | 63.61 | 55.34 | 57.36 | 60.42 | 63.80 | 1927.28 |
| 3 | United Kingdom | 4.59 | 4.75 | 5.01 | 5.20 | 5.51 | 5.79 | 5.98 | 6.20 | 5.56 | ... | 58.08 | 60.27 | 58.50 | 56.86 | 59.18 | 53.86 | 48.12 | 46.43 | 50.00 | 1801.94 |
| 4 | Germany | 2.88 | 3.27 | 4.31 | 4.98 | 4.89 | 4.98 | 5.06 | 5.35 | 4.83 | ... | 46.26 | 48.14 | 46.47 | 45.93 | 46.10 | 39.81 | 41.58 | 45.38 | 49.47 | 1611.06 |
| 5 | Japan | 0.48 | 0.49 | 0.54 | 0.63 | 0.72 | 0.82 | 0.93 | 1.04 | 1.17 | ... | 54.66 | 60.76 | 60.01 | 49.02 | 46.88 | 42.11 | 46.47 | 45.39 | 46.62 | 1511.24 |
| 6 | Saudi Arabia | 0.14 | 0.07 | 0.08 | 0.11 | 0.12 | 0.18 | 0.30 | 0.34 | 0.34 | ... | 45.24 | 48.53 | 56.50 | 67.02 | 80.76 | 87.19 | 63.67 | 70.40 | 67.55 | 1241.87 |
| 7 | Russian Federation | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 58.72 | 70.24 | 81.47 | 88.35 | 84.70 | 66.42 | 69.25 | 66.53 | 61.39 | 1014.83 |
| 8 | India | 0.68 | 0.75 | 1.07 | 1.80 | 1.99 | 2.13 | 1.66 | 1.49 | 1.59 | ... | 46.09 | 49.63 | 47.22 | 47.40 | 50.91 | 51.30 | 56.64 | 64.56 | 66.51 | 935.60 |
| 9 | Italy | 1.01 | 1.06 | 1.22 | 1.46 | 1.59 | 1.72 | 1.90 | 1.93 | 1.99 | ... | 32.02 | 33.83 | 29.78 | 29.96 | 27.70 | 22.18 | 25.03 | 26.45 | 27.81 | 903.51 |
| 10 | South Korea | 0.28 | 0.16 | 0.19 | 0.19 | 0.14 | 0.13 | 0.18 | 0.22 | 0.28 | ... | 28.18 | 30.99 | 31.95 | 34.31 | 37.55 | 36.57 | 36.89 | 39.17 | 43.07 | 719.98 |
| 11 | Brazil | 0.38 | 0.34 | 0.39 | 0.44 | 0.35 | 0.63 | 0.72 | 0.75 | 0.76 | ... | 34.00 | 36.94 | 33.99 | 32.87 | 32.66 | 24.62 | 24.22 | 29.28 | 27.77 | 593.44 |
| 12 | Canada | 1.70 | 1.68 | 1.67 | 1.61 | 1.66 | 1.57 | 1.61 | 1.78 | 1.80 | ... | 19.32 | 21.39 | 20.45 | 18.52 | 17.85 | 17.94 | 17.78 | 21.34 | 21.62 | 522.83 |
| 13 | Spain | 0.23 | 0.24 | 0.31 | 0.32 | 0.39 | 0.44 | 0.56 | 0.55 | 0.67 | ... | 19.71 | 19.70 | 18.86 | 17.24 | 17.18 | 15.19 | 14.01 | 16.04 | 18.25 | 509.99 |
| 14 | Australia | 0.46 | 0.47 | 0.49 | 0.55 | 0.66 | 0.79 | 0.99 | 1.20 | 1.32 | ... | 23.22 | 26.60 | 26.22 | 24.83 | 25.78 | 24.05 | 26.38 | 27.69 | 26.71 | 508.91 |
| 15 | Iran | 0.10 | 0.11 | 0.11 | 0.13 | 0.16 | 0.20 | 0.26 | 0.33 | 0.44 | ... | 13.56 | 14.28 | 16.49 | 12.00 | 9.90 | 10.59 | 12.26 | 13.93 | 13.19 | 433.48 |
| 16 | Israel | 0.19 | 0.24 | 0.21 | 0.26 | 0.28 | 0.31 | 0.38 | 0.61 | 0.69 | ... | 14.61 | 16.34 | 15.57 | 17.32 | 18.49 | 16.97 | 14.78 | 15.58 | 15.95 | 430.63 |
| 17 | Turkey | 0.47 | 0.30 | 0.33 | 0.35 | 0.38 | 0.42 | 0.44 | 0.51 | 0.57 | ... | 17.94 | 17.30 | 17.96 | 18.66 | 17.77 | 15.88 | 17.85 | 17.82 | 18.97 | 400.51 |
| 18 | Poland | 3.55 | 4.03 | 4.38 | 4.95 | 5.20 | 5.52 | 5.98 | 6.28 | 7.22 | ... | 8.79 | 9.46 | 8.99 | 9.28 | 10.35 | 10.21 | 9.16 | 9.87 | 11.60 | 357.68 |
| 19 | Netherlands | 0.45 | 0.55 | 0.60 | 0.64 | 0.74 | 0.75 | 0.77 | 0.88 | 0.91 | ... | 11.22 | 11.65 | 10.36 | 10.23 | 10.33 | 8.67 | 9.12 | 9.58 | 11.24 | 343.92 |
20 rows × 61 columns
plt.figure(figsize=(12,8))
sns.barplot(x = 'Total', y = 'Name', data = df3)
plt.title('Total Millitary Spending from 1960 to 2018')
plt.xlabel('Total in bilions USD')
plt.ylabel('Countries')
plt.grid()
fig = px.pie(df2, values='Total', names='Name', title='Total military spendings in percentage from 1960 to 2018 ')
fig.show()
fig = px.pie(df2, values='1960', names='Name', title='Military spendings in percentage in 1960')
fig.show()
fig = px.scatter_geo(df2, locations = 'Code',hover_name="Name",size = '2018')
fig.update_layout(title="First 20 most powerful country")
fig.show()
df4 = df3.drop(['Total'], axis=1)
Top20 = df4.set_index('Name')
Top20.index = Top20.index.rename('Year')
Top20 = Top20.T
Top20.head()
| Year | United States | China | France | United Kingdom | Germany | Japan | Saudi Arabia | Russian Federation | India | Italy | South Korea | Brazil | Canada | Spain | Australia | Iran | Israel | Turkey | Poland | Netherlands |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1960 | 45.38 | 0.0 | 3.88 | 4.59 | 2.88 | 0.48 | 0.14 | 0.0 | 0.68 | 1.01 | 0.28 | 0.38 | 1.70 | 0.23 | 0.46 | 0.10 | 0.19 | 0.47 | 3.55 | 0.45 |
| 1961 | 47.81 | 0.0 | 4.13 | 4.75 | 3.27 | 0.49 | 0.07 | 0.0 | 0.75 | 1.06 | 0.16 | 0.34 | 1.68 | 0.24 | 0.47 | 0.11 | 0.24 | 0.30 | 4.03 | 0.55 |
| 1962 | 52.38 | 0.0 | 4.49 | 5.01 | 4.31 | 0.54 | 0.08 | 0.0 | 1.07 | 1.22 | 0.19 | 0.39 | 1.67 | 0.31 | 0.49 | 0.11 | 0.21 | 0.33 | 4.38 | 0.60 |
| 1963 | 52.30 | 0.0 | 4.63 | 5.20 | 4.98 | 0.63 | 0.11 | 0.0 | 1.80 | 1.46 | 0.19 | 0.44 | 1.61 | 0.32 | 0.55 | 0.13 | 0.26 | 0.35 | 4.95 | 0.64 |
| 1964 | 51.21 | 0.0 | 4.92 | 5.51 | 4.89 | 0.72 | 0.12 | 0.0 | 1.99 | 1.59 | 0.14 | 0.35 | 1.66 | 0.39 | 0.66 | 0.16 | 0.28 | 0.38 | 5.20 | 0.74 |
plt.figure(figsize=(20,10))
plt.plot(Top20.index, Top20.values)
plt.ylabel('Spendings through year')
plt.title('Top 20 Countries in Military Expenditure ')
plt.xticks(rotation=45)
plt.legend(Top20.columns)
plt.grid(True)
plt.show()
# Percentage of consumption growth by year for the top 20 countries
# general code format
# perccode = (Top20[countryname].iloc[-1] - Top20[countryname].iloc[0]) * 100 / Top20[countryname].iloc[0]
PercUSA = (Top20['United States'].iloc[-1] - Top20['United States'].iloc[0])*100/Top20['United States'].iloc[0]
PercChina = (Top20['China'].iloc[-1] - Top20['China'].iloc[29])*100/Top20['China'].iloc[29]
PercRUS = (Top20['Russian Federation'].iloc[-1] - Top20['Russian Federation'].iloc[33])*100/Top20['Russian Federation'].iloc[33]
PercISR = (Top20['Israel'].iloc[-1] - Top20['Israel'].iloc[0])*100/Top20['Israel'].iloc[0]
PercITA = (Top20['Italy'].iloc[-1] - Top20['Italy'].iloc[0])*100/Top20['Italy'].iloc[0]
PercJPN = (Top20['Japan'].iloc[-1] - Top20['Japan'].iloc[0])*100/Top20['Japan'].iloc[0]
PercNET = (Top20['Netherlands'].iloc[-1] - Top20['Netherlands'].iloc[0])*100/Top20['Netherlands'].iloc[0]
PercPOL = (Top20['Poland'].iloc[-1] - Top20['Poland'].iloc[0])*100/Top20['Poland'].iloc[0]
PercSAU = (Top20['Saudi Arabia'].iloc[-1] - Top20['Saudi Arabia'].iloc[0])*100/Top20['Saudi Arabia'].iloc[0]
PercKOR = (Top20['South Korea'].iloc[-1] - Top20['South Korea'].iloc[0])*100/Top20['South Korea'].iloc[0]
PercSPA = (Top20['Spain'].iloc[-1] - Top20['Spain'].iloc[0])*100/Top20['Spain'].iloc[0]
PercTUR = (Top20['Turkey'].iloc[-1] - Top20['Turkey'].iloc[0])*100/Top20['Turkey'].iloc[0]
PercUK = (Top20['United Kingdom'].iloc[-1] - Top20['United Kingdom'].iloc[0])*100/Top20['United Kingdom'].iloc[0]
PercAUS = (Top20['Australia'].iloc[-1] - Top20['Australia'].iloc[0])*100/Top20['Australia'].iloc[0]
PercBRA = (Top20['Brazil'].iloc[-1] - Top20['Brazil'].iloc[0])*100/Top20['Brazil'].iloc[0]
PercCAN = (Top20['Canada'].iloc[-1] - Top20['Canada'].iloc[0])*100/Top20['Canada'].iloc[0]
PercFRA = (Top20['France'].iloc[-1] - Top20['France'].iloc[0])*100/Top20['France'].iloc[0]
PercGER = (Top20['Germany'].iloc[-1] - Top20['Germany'].iloc[0])*100/Top20['Germany'].iloc[0]
PercIND = (Top20['India'].iloc[-1] - Top20['India'].iloc[0])*100/Top20['India'].iloc[0]
PercIRA = (Top20['Iran'].iloc[-1] - Top20['Iran'].iloc[0])*100/Top20['Iran'].iloc[0]
data = [['United States', PercUSA], ['China', PercChina], ['France', PercFRA], ['United Kingdom', PercUK], ['Germany', PercGER], ['Japan', PercJPN], ['Saudi Arabia', PercSAU], ['Russian Federation', PercRUS], ['India', PercIND], ['Italy', PercITA], ['South Korea', PercKOR], ['Brazil', PercBRA], ['Canada', PercCAN], ['Spain', PercSPA], ['Australia', PercAUS], ['Iran', PercIRA], ['Israel', PercISR], ['Turkey', PercTUR], ['Poland', PercPOL], ['Netherlands', PercNET]]
percdf= pd.DataFrame(data, columns=['Country', 'Percentage growth'])
percdf.head(20)
| Country | Percentage growth | |
|---|---|---|
| 0 | United States | 1330.145439 |
| 1 | China | 2092.982456 |
| 2 | France | 1544.329897 |
| 3 | United Kingdom | 989.324619 |
| 4 | Germany | 1617.708333 |
| 5 | Japan | 9612.500000 |
| 6 | Saudi Arabia | 48150.000000 |
| 7 | Russian Federation | 690.090090 |
| 8 | India | 9680.882353 |
| 9 | Italy | 2653.465347 |
| 10 | South Korea | 15282.142857 |
| 11 | Brazil | 7207.894737 |
| 12 | Canada | 1171.764706 |
| 13 | Spain | 7834.782609 |
| 14 | Australia | 5706.521739 |
| 15 | Iran | 13090.000000 |
| 16 | Israel | 8294.736842 |
| 17 | Turkey | 3936.170213 |
| 18 | Poland | 226.760563 |
| 19 | Netherlands | 2397.777778 |
# Barplot of percentage growth of the top 20 countries plotly
fig = px.bar(percdf, x='Country', y='Percentage growth', title='Percentage growth of the top 20 countries')
fig.show()
model = percdf.join(new['Total'])
model
| Country | Percentage growth | Total | |
|---|---|---|---|
| 0 | United States | 1330.145439 | 17284.12 |
| 1 | China | 2092.982456 | 2368.37 |
| 2 | France | 1544.329897 | 1927.28 |
| 3 | United Kingdom | 989.324619 | 1801.94 |
| 4 | Germany | 1617.708333 | 1611.06 |
| 5 | Japan | 9612.500000 | 1511.24 |
| 6 | Saudi Arabia | 48150.000000 | 1241.87 |
| 7 | Russian Federation | 690.090090 | 1014.83 |
| 8 | India | 9680.882353 | 935.60 |
| 9 | Italy | 2653.465347 | 903.51 |
| 10 | South Korea | 15282.142857 | 719.98 |
| 11 | Brazil | 7207.894737 | 593.44 |
| 12 | Canada | 1171.764706 | 522.83 |
| 13 | Spain | 7834.782609 | 509.99 |
| 14 | Australia | 5706.521739 | 508.91 |
| 15 | Iran | 13090.000000 | 433.48 |
| 16 | Israel | 8294.736842 | 430.63 |
| 17 | Turkey | 3936.170213 | 400.51 |
| 18 | Poland | 226.760563 | 357.68 |
| 19 | Netherlands | 2397.777778 | 343.92 |
fig = px.scatter(model, x="Percentage growth", y="Total", color="Country", size='Total', hover_name="Country", log_x=True, size_max=60)
fig.show()
fig = px.sunburst(model, path=['Country', 'Percentage growth'], values='Total', color='Percentage growth', color_continuous_scale='rdbu')
fig.update_layout(margin = dict(t=10, l=0, r=0, b=0))
fig.show()
df_ind = df[df.Name == "India"]
df_ind = df_ind.drop(["Code", "Type", "Indicator Name"], axis=1)
df_ind = df_ind.set_index("Name")
df_ind
| 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | 1968 | 1969 | ... | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Name | |||||||||||||||||||||
| India | 681765681.8 | 748388248.4 | 1.065436e+09 | 1.795449e+09 | 1.986654e+09 | 2.125990e+09 | 1.661155e+09 | 1.487733e+09 | 1.586867e+09 | 1.691433e+09 | ... | 3.872215e+10 | 4.609045e+10 | 4.963382e+10 | 4.721692e+10 | 4.740353e+10 | 5.091411e+10 | 5.129548e+10 | 5.663762e+10 | 6.455944e+10 | 6.651029e+10 |
1 rows × 59 columns
df_ind.isnull().sum().sum()
0
df_ind = df_ind.transpose()
df_ind = df_ind.div(10**9) #making biger numbers smaller.
fig = plt.figure()
ax = fig.add_axes([0,0,2.2,1.5])
plt.plot(df_ind['India'],color = '#0B5345');
font1 = {'family':'times new roman','color':'blue','size':40}
font2 = {'family':'serif','color':'#D35400','size':25}
plt.title("India's military Budget in Billion USD$",fontdict = font1)
plt.xlabel('Years',fontdict = font2)
plt.ylabel('Expenditure', fontdict = font2)
plt.grid()
plt.xticks(rotation = 45)
plt.show()
decades = ["196", "197", "198", "199", "200", "201"]
rows = 2
cols = 3
fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(20,11), sharey=True)
fig.suptitle("Indian military budget by year in billions of US$", fontsize=40, color = '#800000')
for i, decade in enumerate(decades):
a, b = divmod(i, cols)
df_decade = df_ind[df_ind.transpose().columns.str.startswith(decade)]
df_decade.div(10**9).plot.bar(ax=axes[a,b], legend=False, rot=45,color = '#5B2C6F')
plt.show()
_df = df_ind.transpose()
df_pre_2k = _df.loc[:, _df.columns.str.startswith("19")].transpose()
df_pos_2k = _df.loc[:, _df.columns.str.startswith("20")].transpose()
df_pre_2k.div(10**9).plot(figsize=(12,8), legend=False, rot=45, color = '#5B2C6F');
_years = tuple([str(y) for y in list(range(1984, 2000))])
df_80s = df_pre_2k.transpose()
df_80s = df_80s.loc[:,'1984':'2000'].transpose().div(10)
df_80s.div(10**9).plot.bar(figsize=(10,8), legend=False, rot=0, title="Military spending in India in billions of US$");
pct_serie = df_80s.transpose().iloc[0].pct_change()
pct_serie[1:].multiply(100).plot.bar(figsize=(10,8), rot=0, color=(pct_serie[1:] > 0).map({True: 'g', False: 'C3'}));
plt.title("Percentage change in military spending in India from 2001 to 2018", fontsize=15, color = '#800000', fontweight='bold')
plt.xlabel('Years', fontsize=20 )
plt.ylabel('Growth', fontsize=20)
plt.show()
budgets_comp = df_80s.transpose()[["1984", "1987"]].iloc[0].pct_change()[1]
budgets_comp
0.5635910139269893
We as Indians took our freedom in 1947, During the first 2 decades we were heavily recovering from the economical and structure damages.. But even then we started focusing on the military aspect of a country.
-> 24 May 1985 - Terrorist and Disruptive Activities (Prevention) Act is came into effect after presidential assent.
-> 30 April 1986 – Operation Black Thunder conducted to flush out remaining Sikh extremists from the Golden Temple.
-> 1991 Economic Crisis
-> 1996 A devastating Category 4 Cyclone strikes Andhra Pradesh, India. The storm surge sweeps fishing villages out to sea, over 2,000 people die. 95% of the crops are completely destroyed.
-> 1998 :- First Atomic test of India (Pokhran,rajasthan), Over 1,120 people are killed when a cyclone hits coastal areas of the western state of Gujarat.
df_pos_2k.div(10**9).plot(figsize=(12,8), legend=False, color = '#800000');
# adding title and axis names
plt.title("India's military budget in billions of US$",fontsize=25, color = '#800000', fontweight='bold')
plt.xlabel('Years', fontsize=20 )
plt.ylabel('Expenditure', fontsize=20)
plt.show()
_years = tuple([str(y) for y in list(range(2000, 2019))])
df_2ks = df_pos_2k.transpose()
df_2ks = df_2ks.loc[:, df_2ks.columns.str.startswith(_years)].transpose().div(10**9)
df_2ks.plot.bar(figsize=(10,8), legend=False,rot = 0,);
plt.title("Military spending in billions of US$",fontsize=25, color = '#800000', fontweight='bold')
plt.xlabel('Years', fontsize=20 )
plt.ylabel('Expenditure', fontsize=20)
plt.show()
pct_serie = df_2ks.transpose().iloc[0].pct_change()
pct_serie[1:].multiply(100).plot.bar(figsize=(10,8), rot=0, color=(pct_serie[1:] > 0).map({True: 'g', False: 'r'}));
plt.title("Percentage change in military spending in India from 2001 to 2018", fontsize=15, color = '#800000', fontweight='bold')
plt.xlabel('Years', fontsize=20 )
plt.ylabel('Growth', fontsize=20)
plt.show()
-> 2002 - 2003 Finance Minister Yashwant Sinha presents the 2002–03 budget. Amongst its major features are a 4.8% increase in defence spending and a 5% surcharge on income tax to pay for this.
-> 2005 Mumbai and the Mumbai Conurbation area is submerged in 5–7 ft. of water due to heavy rains, making nearby dams release water causing a massive flood, which virtually stops the financial capital of India for 4–5 days.
-> 2006 A terrorist attack in Varanasi killed at least 15 people and left more than 50 injured. A series of coordinated bomb attacks struck several commuter trains in Mumbai, India during the evening rush hour.
-> 2012 Major fire in the signalling system at Kurla brings Central line and Harbour line of Mumbai Suburban Railway to a standstill and two commuters lose their lives after falling off an overcrowded local train. The loss was expected to be ₹170 million (US$2.1 million) to the Central Railway. A pair of great earthquakes occur in the Wharton Basin west of Sumatra in Indonesia. The maximum Mercalli intensity of this strike-slip doublet earthquake was VII (Very strong). Ten were killed, twelve were injured, and a non-destructive tsunami was observed on the island of Nias.
-> 2015 ISRO launched 5 UK satellites with a total payload of 1,440 kg using the PSLV-C28 launch vehicle.
-> During the rest year the growth was pretty normal.
Our last availabe data, for the year of 2018, shows that the military budget for that year was around 66.5 billions of US dollars.
df_ind.transpose()["2018"].India
66.510289108
In December 2017, the US dollar to Indian rupees conversion rate was around 1 / 65.0966 meaning 1 US dollar was worth around 65.0966 Indian Rupee. This means that the military budget for that year was higher than 4329 billions of indian rupees.
military_2018 = df_ind.transpose()["2018"].multiply(65.0966).India
military_2018
4329.5936859478325
health_2018 = 5666.44 # sourced from internet
educat_2018 = 8459.26 #sourced from internet
series_2018 = pd.Series({"Health": health_2018,"Education": educat_2018,"Military": military_2018})
fig, axes = plt.subplots(ncols=2, figsize=(18,6))
fig.suptitle("Indian military x education x health budget - 2018", fontsize=25, color = '#800000')
series_2018.sort_values(ascending=False).plot.bar(ax=axes[0], rot=0, color = ['#0B5345','#800000','#5B2C6F'])
series_2018.plot.pie(ax=axes[1], autopct="%1.2f%%", colors = ['#800000','#006400','#5B2C6F'], fontsize=15, wedgeprops={"linewidth":2,"edgecolor":"white"}, textprops={"fontsize":15},shadow=True, startangle=90);
axes[1].set_ylabel("");
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor
from sklearn.preprocessing import PolynomialFeatures,StandardScaler
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
df_ml = df.dropna()
df_ml
df_ml = df_ml.drop(columns= ['Code','Type','Indicator Name'])
df_ml.iloc[:,1:] = df_ml.iloc[:,1:].div(10**9) #made the vales shorter for better clarity
df_ml.rename(columns = {'Name':'Srno'}, inplace = True)
df_ml = df_ml.set_index('Srno').transpose()
df_ml.index.astype('int64')
df_ml.reset_index(inplace=True)
df_ml.rename(columns = {'index':'Year'}, inplace = True)
df_ml.astype({'Year':'int64'})
df_ml['Year']= [1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968,
1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977,
1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2018]
df_ml.dtypes
Srno
Year int64
Australia float64
Austria float64
Belgium float64
Burkina Faso float64
...
South Asia (IDA & IBRD) float64
Tunisia float64
Turkey float64
United States float64
South Africa float64
Length: 67, dtype: object
fig = plt.figure(figsize = (20,5))
plt.scatter(df_ml.index,df_ml['India'])
plt.xticks(rotation = 45)
plt.title('Expense through the year', fontsize = 30,color = 'blue',fontweight = 'bold')
plt.xlabel('Year',fontsize = 15, color = '#800080')
plt.ylabel('Expenses in billion', fontsize = 15, color = '#800080')
plt.show()
# SGD regression machine learning model
X = df_ml['Year'].values.reshape(-1,1)
y = df_ml['India'].values.reshape(-1,1)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 0)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
sgd_reg = SGDRegressor(max_iter = 1000,tol = 1e-3,penalty = None,eta0 = 0.1)
sgd_reg.fit(X_train,y_train.ravel())
y_pred = sgd_reg.predict(X_test)
# accuracy of the model
r2_score(y_test,y_pred)
-1.0831786912998873
# plotting the SGD regression
plt.figure(figsize=(20,5))
plt.plot(X_test,y_pred,color = 'red')
plt.scatter(X_test,y_test,color = 'blue')
plt.title('SGD regression',fontsize = 30, color = 'blue',fontweight = 'bold')
plt.xlabel('Year',fontsize = 15, color = '#800080')
plt.ylabel('Expenses in billion',fontsize = 15, color = '#800080')
plt.show()
# polynomial regression machine learning model
X = df_ml['Year'].values.reshape(-1,1)
y = df_ml['India'].values.reshape(-1,1)
# dedining train and test data points
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 0)
poly_reg = PolynomialFeatures(degree = 4)
# define lin_reg
lin_reg = LinearRegression()
# define pipeline
pipeline = Pipeline([('poly',poly_reg),('lin_reg',lin_reg)])
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
X_poly = poly_reg.fit_transform(X_train)
# accuracy of the model in percentage
print("accuracy =",r2_score(y_test,y_pred)*100)
accuracy = 80.72113788055056
# plotting the polynomial regression model
plt.figure(figsize=(20,5))
plt.scatter(X,y,color = 'red')
plt.plot(X,lin_reg.predict(poly_reg.fit_transform(X)),color = 'blue')
plt.title('Polynomial Regression', fontsize = 30,color = 'blue',fontweight = 'bold')
plt.xlabel('Year' , fontsize = 15, color = '#800080')
plt.ylabel('Expenses in billion', fontsize = 15, color = '#800080')
plt.show()
# predicting values for the year 2019 and 2020
print(lin_reg.predict(poly_reg.fit_transform([[2019]])))
print(lin_reg.predict(poly_reg.fit_transform([[2020]])))
[[72.4069378]] [[77.22599783]]
#plotting values with the year 2019 and 2020
plt.figure(figsize=(20,5))
plt.scatter(X,y,color = 'red')
plt.plot(X,lin_reg.predict(poly_reg.fit_transform(X)),color = 'blue')
plt.scatter(2019,lin_reg.predict(poly_reg.fit_transform([[2019]])),color = 'green')
plt.scatter(2020,lin_reg.predict(poly_reg.fit_transform([[2020]])),color = 'green')
plt.title('Polynomial Regression', fontsize = 30,color = 'blue',fontweight = 'bold')
plt.xlabel('Year' , fontsize = 15, color = '#800080')
plt.ylabel('Expenses in billion', fontsize = 15, color = '#800080')
plt.show()